library(tidyverse)
library(janitor)
library(rsprite2)
library(scrutiny) 
library(purrr)
library(patchwork)
library(knitr)
library(kableExtra)

min_decimals <- function(x, digits = 2) {
  sprintf(paste0("%.", digits, "f"), x)
}

plot_helper <- function(dat, color_bounds = TRUE){
  
  dat <- dat |>
    mutate(label = case_when(mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
                             sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
                             TRUE ~ "Possible value flagged as possible"))
  
  if(color_bounds == FALSE){
    
    # only GRIM-consistent values
    p_grim <- dat |>
      filter(grim) |>
      ggplot(aes(mean, sd)) +
      geom_point(shape = 15, size = 0.5, color = "grey35") + 
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      scale_color_viridis_d(begin = 0.3, end = 0.7) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM consistent values") 
    
    # only GRIM and GRIMMER-consistent values
    p_grimmer <- dat |>
      filter(grim & grimmer) |>
      ggplot(aes(mean, sd)) +
      geom_point(shape = 15, size = 0.5, color = "grey35") +
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      scale_color_viridis_d(begin = 0.3, end = 0.7) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM + GRIMMER consistent values") 
    
    # only GRIM and GRIMMER and TIDES-consistent values
    p_tides <- dat |>
      filter(grim & grimmer & tides) |>
      ggplot(aes(mean, sd)) +
      geom_point(shape = 15, size = 0.5, color = "grey35") +
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM + GRIMMER + TIDES consistent values")
    
    return(p_grim + 
             p_grimmer + 
             p_tides + 
             plot_layout(ncol = 1))
  }  
  
  if(color_bounds){
    
    # with coloring of impossible values
    # only GRIM-consistent values with colors
    p_temp_with_colors <- dat |>
      filter(grim) |>
      # mutate(label = case_when(grim == FALSE ~ "Impossible value flagged as impossible",
      #                          mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
      #                          sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
      #                          TRUE ~ "Possible value flagged as possible")) |>
      ggplot(aes(mean, sd, color = label)) +
      geom_point(shape = 15, size = 0.5) + # "grey20"
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      scale_color_viridis_d(begin = 0.3, end = 0.7) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM consistent values") +
      theme(legend.position = "top",
            legend.direction = "vertical") +
      guides(color = guide_legend(override.aes = list(size = 4, ncol = 1), title = NULL))
    
    # Extract legend grob - magic chatGPT code no idea how it works
    legend_grob <- ggplotGrob(p_temp_with_colors)$grobs[[which(sapply(ggplotGrob(p_temp_with_colors)$grobs, function(x) x$name) == "guide-box")]]
    
    # Convert the legend grob into a patchwork-compatible object
    legend <- wrap_elements(grid::grobTree(legend_grob))
    
    # grim plot without legend
    p_grim_with_colors <- p_temp_with_colors +
      theme(legend.position = "none")
    
    # only GRIM and GRIMMER-consistent values with colors
    p_grimmer_with_colors <- dat |>
      filter(grim & grimmer) |>
      # mutate(label = case_when(grim == FALSE | grimmer == FALSE ~ "Impossible value flagged as impossible",
      #                          mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
      #                          sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
      #                          TRUE ~ "Possible value flagged as possible")) |>
      ggplot(aes(mean, sd, color = label)) +
      geom_point(shape = 15, size = 0.5) +
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      scale_color_viridis_d(begin = 0.3, end = 0.7) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM + GRIMMER consistent values") +
      theme(legend.position = "none")
    
    # only GRIM and GRIMMER and TIDES-consistent values with colors
    p_tides_with_colors <- dat |>
      filter(grim & grimmer & tides) |>
      # mutate(label = case_when(grim == FALSE | grimmer == FALSE | tides == FALSE ~ "Impossible value flagged as impossible",
      #                          mean < 1 | mean > 7 ~ "Impossible value flagged as possible",
      #                          sd < 0 | sd > 3.6 ~ "Impossible value flagged as possible", # Croucher (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
      #                          TRUE ~ "Possible value flagged as possible")) |>
      ggplot(aes(mean, sd, color = label)) +
      geom_point(shape = 15, size = 0.5) +
      theme_linedraw() +
      scale_y_continuous(breaks = scales::breaks_pretty(n = 8), 
                         limit = c(0, 4), 
                         expand = c(0.01, 0.01)) +
      scale_x_continuous(breaks = scales::breaks_pretty(n = 7), 
                         limit = c(0.5, 7.5), 
                         expand = c(0.01, 0.01)) +
      scale_color_viridis_d(begin = 0.3, end = 0.7, direction = -1) +
      ylab("Standard Deviation") +
      xlab("Mean") +
      ggtitle("GRIM + GRIMMER + TIDES consistent values") +
      theme(legend.position = "none")
    
    return(legend +
             p_grim_with_colors + 
             p_grimmer_with_colors + 
             p_tides_with_colors + 
             plot_layout(ncol = 1, heights = c(.1, 1, 1, 1)))
  }
}

# proportion of possible values, considering only those where mean is within the scale bounds and SD is within Croucher's (2004) loose heuristic of max SD as 60% of range (ie 6 * .6 = 3.6)
table_helper <- function(dat, max_sd){
  dat_feasible <- dat |>
    filter(mean >= 1 & mean <= 7 & sd >= 0 & sd <= max_sd)
  
  data.frame(check = c("GRIM", "GRIM+GRIMMER", "GRIM+GRIMMER+TIDES"),
             n_total = dat_feasible |>
               count(name = "n_total"),
             n_possible = c(dat_feasible |>
                              filter(grim) |>
                              count() |>
                              pull(n),
                            dat_feasible |>
                              filter(grim, grimmer) |>
                              count() |>
                              pull(n),
                            dat_feasible |>
                              filter(grim, grimmer, tides) |>
                              count() |>
                              pull(n))) |>
    mutate(proportion = janitor::round_half_up(n_possible/n_total, 3)) |>
    kable(align = "r") |>
    kable_classic(full_width = FALSE)
}

GRIM vs GRIM+GRIMMER vs GRIM+GRIMMER+Umbrella plot

using scrutiny + tides_modified_from_.sd_limits()

tides_modified_from_.sd_limits <- function(n_obs, mean, sd, min_val, max_val, sd_prec = NULL, n_items = 1) {
  
  if (is.null(sd_prec)) {
    sd_prec <- max(nchar(sub("^[0-9]*", "", mean)) - 1, 0)
  }
  
  result <- c(-Inf, Inf)
  
  aMax <- min_val
  aMin <- floor(mean*n_items)/n_items
  bMax <- min(max(max_val, min_val + 1, aMin + 1), max_val)   # Adjusted here
  bMin <- min(aMin + 1/n_items, max_val)                      # Adjusted here
  total <- round(mean * n_obs * n_items)/n_items
  
  poss_values <- max_val
  for (i in seq_len(n_items)) {
    poss_values <- c(poss_values, min_val:(max_val-1) + (1 / n_items) * (i - 1))
  }
  poss_values <- sort(poss_values)
  
  for (abm in list(c(aMin, bMin, 1), c(aMax, bMax, 2))) {
    
    a <- abm[1]
    b <- abm[2]
    m <- abm[3]
    
    # Adjust a and b to be within min_val and max_val
    a <- min(max(a, min_val), max_val)
    b <- min(max(b, min_val), max_val)
    
    if (a == b) {
      vec <- rep(a, n_obs)
    } else {
      k <- round((total - (n_obs * b)) / (a - b))
      k <- min(max(k, 1), n_obs - 1)
      vec <- c(rep(a, k), rep(b, n_obs - k))
      diff <- sum(vec) - total
      
      if ((diff < 0)) {
        vec <- c(rep(a, k - 1), a + abs(diff), rep(b, n_obs - k))
      } else if ((diff > 0)) {
        vec <- c(rep(a, k), b - diff, rep(b, n_obs - k - 1))
      }
    }
    
    # instead of throwing errors here, return NA
    # if (round(mean(vec), sd_prec) != round(mean, sd_prec) | !all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
    #   stop("Error in calculating range of possible standard deviations")
    # }
    # result[m] <- round(sd(vec), sd_prec)
    
    # Check if the calculated mean and values match expected conditions
    if (round(mean(vec), sd_prec) == round(mean, sd_prec) & all(floor(vec*10e9) %in% floor(poss_values*10e9))) {
      result[m] <- round(sd(vec), sd_prec)
    }
    
  }
  
  # Replace Inf or -Inf with NA
  result[is.infinite(result)] <- NA
  
  # returns df instead of vector
  res <- 
    data.frame(sd_min = result[1],
               sd_max = result[2]) |>
    mutate(tides = case_when(sd < sd_min ~ FALSE,
                             is.na(sd_min) ~ FALSE,
                             sd > sd_max ~ FALSE,
                             is.na(sd_max) ~ FALSE,
                             TRUE ~ TRUE))
  
  return(res)
}

# dat_n_11 <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 11,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   mutate(tides = pmap(list(n_obs, mean, sd, min_val, max_val, sd_prec, n_items),
#                       tides_modified_from_.sd_limits)) |>
#   unnest(grim) |>
#   unnest(grimmer) |>
#   unnest(tides)
# 
# dat_n_100 <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 100,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   mutate(tides = pmap(list(n_obs, mean, sd, min_val, max_val, sd_prec, n_items),
#                       tides_modified_from_.sd_limits)) |>
#   unnest(grim) |>
#   unnest(grimmer) |>
#   unnest(tides)
# 
# write_rds(dat_n_11, "results_grim_grimmer_tides_n_11.rds")
# write_rds(dat_n_100, "results_grim_grimmer_tides_n_100.rds")

dat_n_11 <- read_rds("results_grim_grimmer_tides_n_11.rds")
dat_n_100 <- read_rds("results_grim_grimmer_tides_n_100.rds")

N=11

plot_helper(dat_n_11, color_bounds = FALSE)

plot_helper(dat_n_11, color_bounds = TRUE)

table_helper(dat_n_11, max_sd = 3.6)
check n_total n_possible proportion
GRIM 216961 43681 0.201
GRIM+GRIMMER 216961 12797 0.059
GRIM+GRIMMER+TIDES 216961 3335 0.015

N=100

plot_helper(dat_n_100, color_bounds = FALSE)

plot_helper(dat_n_100, color_bounds = TRUE)

table_helper(dat_n_100, max_sd = 3.6)
check n_total n_possible proportion
GRIM 216961 216961 1.000
GRIM+GRIMMER 216961 196217 0.904
GRIM+GRIMMER+TIDES 216961 112273 0.517

using newly developed min_sd() and max_sd() functions

min_sd <- function(mean, n, min_score, max_score, integer_responses = TRUE) {
  # ---- 1) required total sum (rounded to an integer if the problem implies 
  # exact feasibility)
  required_sum <- n * mean
  
  # round required_sum if the problem implies the mean is exactly representable 
  # by integer responses:
  if(integer_responses){
    required_sum <- round(required_sum)
  }
  
  # feasibility check: the sum must lie between n*min_score and n*max_score 
  # for an integer distribution to exist.
  if (required_sum < n*min_score || required_sum > n*max_score) {
    stop("No distribution of [min_score, max_score] integers can achieve that mean (sum out of range).")
  }
  
  # ---- 2) check if the mean is effectively an integer within [min_score,max_score].
  # if so, the minimal SD is 0 by taking all responses = that integer.
  if (abs(required_sum - n*round(mean)) < 1e-9 && round(mean) >= min_score && round(mean) <= max_score) {
    # i.e. if mean was effectively an integer in the feasible range
    # check if n * round(mean) == required_sum
    if (round(mean) * n == required_sum) {
      # all responses equal to that integer
      dist <- rep(round(mean), n)
      return(data.frame(min_sd = 0))
    }
  }
  
  # ---- 3) otherwise, we attempt to use two adjacent integers L and L+1
  # let L = floor(mean), clamped to [min_score, max_score].
  L_init <- floor(mean)
  if (L_init < min_score) L_init <- min_score
  if (L_init > max_score) L_init <- max_score
  
  # we'll define a small helper to build a distribution given L
  # and return if feasible:
  build_dist_from_L <- function(L, required_sum, n, min_score, max_score) {
    if (L < min_score || L > max_score) {
      return(NULL)
    }
    # leftover = how many times we need (L+1)
    leftover <- required_sum - n*L
    if (leftover == 0) {
      # all L
      return(rep(L, n))
    } else if (leftover > 0 && leftover <= n) {
      # leftover data points (L+1), the rest are L
      # but only if (L+1) <= max_score
      if ((L + 1) <= max_score) {
        return(c(rep(L, n - leftover), rep(L + 1, leftover)))
      } else {
        return(NULL)
      }
    } else {
      # leftover < 0 or leftover > n => not feasible with L and L+1
      return(NULL)
    }
  }
  
  # try L_init directly:
  dist <- build_dist_from_L(L_init, required_sum, n, min_score, max_score)
  if (is.null(dist)) {
    # if that didn't work, try adjusting L_init up or down by 1 step
    # (sometimes floor(mean) is not the right choice if leftover < 0 or > n).
    
    # let's define a small search around L_init:
    candidates <- unique(c(L_init - 1, L_init, L_init + 1))
    candidates <- candidates[candidates >= min_score & candidates <= max_score]
    
    found <- FALSE
    for (L_try in candidates) {
      dist_try <- build_dist_from_L(L_try, required_sum, n, min_score, max_score)
      if (!is.null(dist_try)) {
        dist <- dist_try
        found <- TRUE
        break
      }
    }
    
    if (!found) {
      # possibly no solution with 2 adjacent integers => no feasible distribution
      stop("Could not construct a minimal-variance distribution with two adjacent integers.")
    }
  }
  
  # if we reach here, 'dist' is a valid distribution
  # ---- 4) compute sample SD in R (with denominator n-1)
  min_sd <- sd(dist)
  
  data.frame(min_sd = min_sd)
}

max_sd <- function(mean, n, min_score, max_score, integer_responses = TRUE) {
  # 1) required sum
  required_sum <- n * mean
  
  # check: is required_sum in feasible range [n*min_score, n*max_score]?
  if (required_sum < n*min_score - 1e-9 || required_sum > n*max_score + 1e-9) {
    stop("No integer distribution can achieve this mean with responses in [min_score,max_score].")
  }
  
  # round required_sum if the problem implies the mean is exactly 
  # representable by integer responses:
  if(integer_responses){
    required_sum <- round(required_sum)
  }
  
  # 2) solve for x 
  x <- floor((required_sum - n*min_score) / (max_score - min_score))
  x <- max(0, min(x, n))  # keep it in [0, n]
  
  # 3) leftover = required_sum - [x*max_score + (n-x)*min_score]
  delta <- required_sum - (x*max_score + (n - x)*min_score)
  if (delta < 0 || delta >= (max_score - min_score)) {
    stop("Something went wrong with leftover. Check input.")
  }
  
  # 4) sum of squares around mean:
  
  # part A: from x responses at max_score
  ss_max_score <- x * (max_score - mean)^2
  
  # part B: from (n - x - 1) responses at min_score (if we do have a leftover) 
  # or from (n - x) responses at min_score (if no leftover)
  if (delta == 0) {
    # no middle value
    ss_min_score <- (n - x) * (min_score - mean)^2
    ss_m <- 0
  } else {
    ss_min_score <- (n - x - 1) * (min_score - mean)^2
    middle_val <- min_score + delta
    ss_m <- (middle_val - mean)^2
  }
  
  # total sum of squares
  ss_total <- ss_max_score + ss_min_score + ss_m
  
  # sample variance = ss_total / (n - 1)
  var_max <- ss_total / (n - 1)
  sd_max  <- sqrt(var_max)
  
  data.frame(max_sd = sd_max)
}

# dat_n_11_new <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 11,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   unnest(grim) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   unnest(grimmer) |>
#   mutate(tides_max = pmap(list(mean, n_obs, min_val, max_val),
#                           possibly(max_sd, otherwise = NA))) |>
#   unnest(tides_max) |>
#   mutate(tides_min = pmap(list(mean, n_obs, min_val, max_val),
#                           possibly(min_sd, otherwise = NA))) |>
#   unnest(tides_min) |>
#   mutate(tides = case_when(sd <= max_sd & sd >= min_sd ~ TRUE,
#                            TRUE ~ FALSE))
# 
# dat_n_100_new <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 100,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   unnest(grim) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   unnest(grimmer) |>
#   mutate(tides_max = pmap(list(mean, n_obs, min_val, max_val),
#                           possibly(max_sd, otherwise = NA))) |>
#   unnest(tides_max) |>
#   mutate(tides_min = pmap(list(mean, n_obs, min_val, max_val),
#                           possibly(min_sd, otherwise = NA))) |>
#   unnest(tides_min) |> |>
#   mutate(tides = case_when(sd <= round(max_sd, 2) & sd >= round(min_sd, 2) ~ TRUE,
#                            TRUE ~ FALSE))
# 
# write_rds(dat_n_11_new, "results_grim_grimmer_tides_n_11_new.rds")
# write_rds(dat_n_100_new, "results_grim_grimmer_tides_n_100_new.rds")

dat_n_11_new <- read_rds("results_grim_grimmer_tides_n_11_new.rds")
dat_n_100_new <- read_rds("results_grim_grimmer_tides_n_100_new.rds")

N=11

plot_helper(dat_n_11_new, color_bounds = FALSE)

plot_helper(dat_n_11_new, color_bounds = TRUE)

table_helper(dat_n_11_new, max_sd = 3.6)
check n_total n_possible proportion
GRIM 139631 27925 0.200
GRIM+GRIMMER 139631 6397 0.046
GRIM+GRIMMER+TIDES 139631 6035 0.043

N=100

plot_helper(dat_n_100_new, color_bounds = FALSE)

plot_helper(dat_n_100_new, color_bounds = TRUE)

table_helper(dat_n_100_new, max_sd = 3.6)
check n_total n_possible proportion
GRIM 141466 141466 1.000
GRIM+GRIMMER 141466 120994 0.855
GRIM+GRIMMER+TIDES 141466 112031 0.792

using tides package min_sd() and max_sd() functions

NB added rounding for checks

min_sd <- function(mean, n) {
  # determine the closest integer values around the mean
  lower_value <- floor(mean)
  upper_value <- ceiling(mean)
  
  # calculate frequencies required to achieve the mean
  total_sum <- n * mean
  lower_count <- n * (upper_value - mean)
  upper_count <- n * (mean - lower_value)
  
  # if the calculated counts are not integers, round them appropriately
  lower_count <- round(lower_count)
  upper_count <- round(upper_count)
  
  # ensure the total count equals n
  if (lower_count + upper_count != n) {
    lower_count <- n - upper_count
  }
  
  # form the combination of values
  values <- c(rep(lower_value, lower_count), rep(upper_value, upper_count))
  
  # calculate the mean and standard deviation
  #actual_mean <- mean(values) 
  min_sd <- sd(values)
  
  #return(list(actual_mean = actual_mean, min_sd = min_sd))
  return(min_sd)
}

max_sd <- function(mean, n, min, max){
  
  # handle edge cases where the mean is at the minimum or maximum
  if (mean == min || mean == max) {
    max_sd <- 0
  } else {
    
    # mirror values for special cases to ensure min < max
    if (abs(min) > abs(max)) {
      temp <- min
      min <- max
      max <- temp
    }
    
    # calculate the number of observations at the maximum value
    num_max <- floor((n * mean - n * min) / (max - min))
    
    # calculate the number of observations at the minimum value
    num_min <- n - 1 - num_max
    
    # adjust max if num_max is 0
    if (num_max == 0) {
      max <- 0
    }
    
    # compute the sum of means adjusted by the number of min and max values
    adjusted_mean_sum <- n * mean - num_min * min - num_max * max
    
    # compute maximum variability
    max_variability <- (num_min * (min - mean)^2 + num_max * (max - mean)^2 + (mean - adjusted_mean_sum)^2) / (n - 1)
    
    # calculate the maximum standard deviation
    max_sd <- sqrt(max_variability)
  }
  
  return(max_sd)
}

tides_single <- function(mean, sd, n, min, max, calculate_min_sd = TRUE, verbose = FALSE){
  
  # calculate the maximum standard deviation
  max_sd <- max_sd(mean, n, min, max)
  
  # calculate the minimum standard deviation using the provided function
  if (calculate_min_sd) {
    min_sd <- min_sd(mean, n)
  } else {
    min_sd <- NA
  }
  
  # standardize the mean and SD based on the max possible range
  standardized_mean <- (mean - min) / (max - min)
  max_standardized_sd <- ( sqrt( (standardized_mean) * (1 - (standardized_mean)) ) ) * ( sqrt(n / (n - 1)) )
  standardized_sd <- sd / max_sd
  
  # create the result tibble based on verbosity
  if (verbose) {
    res <- tibble(mean = mean,
                  sd = sd,
                  n = n,
                  min = min,
                  max = max,
                  standardized_mean = standardized_mean,
                  standardized_sd = standardized_sd,
                  max_standardized_sd = max_standardized_sd,
                  min_sd = round(min_sd, 2),
                  max_sd = round(max_sd, 2),
                  tides = case_when(sd > max_sd | (sd < min_sd | is.na(min_sd)) ~ FALSE,
                                    sd <= max_sd & (sd >= min_sd | is.na(min_sd)) ~ TRUE))
  } else if (!verbose){
    res <- tibble(standardized_mean = standardized_mean,
                  standardized_sd = standardized_sd,
                  max_standardized_sd = max_standardized_sd,
                  min_sd = round(min_sd, 2),
                  max_sd = round(max_sd, 2),
                  tides = case_when(sd > max_sd | (sd < min_sd | is.na(min_sd)) ~ FALSE,
                                    sd <= max_sd & (sd >= min_sd | is.na(min_sd)) ~ TRUE))
  }
  
  return(res)
}

# dat_n_11_package <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 11,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   unnest(grim) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   unnest(grimmer) |>
#   mutate(tides = pmap(list(mean, sd, n_obs, min_val, max_val),
#                           possibly(tides_single, otherwise = NA))) |>
#   unnest(tides)
# 
# dat_n_100_package <-
#   expand_grid(mean = seq(from = 0.5, to = 7.5, by = 0.01),
#               sd = seq(from = 0, to = 4, by = 0.01)) |>
#   mutate(n_obs   = 100,
#          m_prec  = 2,
#          sd_prec = 2,
#          n_items = 1,
#          min_val = 1,
#          max_val = 7) |>
#   mutate(grim = pmap(list(as.character(mean), n_obs),
#                      grim)) |>
#   unnest(grim) |>
#   mutate(grimmer = pmap(list(as.character(mean), as.character(sd), n_obs),
#                         grimmer)) |>
#   unnest(grimmer) |>
#   mutate(tides = pmap(list(mean, sd, n_obs, min_val, max_val),
#                           possibly(tides_single, otherwise = NA))) |>
#   unnest(tides)
#
# write_rds(dat_n_11_package, "results_grim_grimmer_tides_n_11_package.rds")
# write_rds(dat_n_100_package, "results_grim_grimmer_tides_n_100_package.rds")

dat_n_11_package <- read_rds("results_grim_grimmer_tides_n_11_package.rds")
dat_n_100_package <- read_rds("results_grim_grimmer_tides_n_100_package.rds")

N=11

plot_helper(dat_n_11_package, color_bounds = FALSE)

plot_helper(dat_n_11_package, color_bounds = TRUE)

table_helper(dat_n_11_package, max_sd = 3.6)
check n_total n_possible proportion
GRIM 139589 27995 0.201
GRIM+GRIMMER 139589 6395 0.046
GRIM+GRIMMER+TIDES 139589 6041 0.043

N=100

plot_helper(dat_n_100_package, color_bounds = FALSE)

plot_helper(dat_n_100_package, color_bounds = TRUE)

table_helper(dat_n_100_package, max_sd = 3.6)
check n_total n_possible proportion
GRIM 141673 141673 1.000
GRIM+GRIMMER 141673 121201 0.855
GRIM+GRIMMER+TIDES 141673 112273 0.792

Compare

# identical(dat_n_11, dat_n_11_new)
# 
# all.equal(dat_n_11, dat_n_11_new)
# 
# colnames(dat_n_11)

dat_n_11_combined <- 
  rename(dat_n_11, 
         max_sd_.sd_limits = sd_max, 
         min_sd_.sd_limits = sd_min,
         tides_modified_from_.sd_limits = tides) |>
  full_join(rename(dat_n_11_new, 
                   max_sd_new = max_sd, 
                   min_sd_new = min_sd,
                   tides_new = tides),
            by = c("mean", "sd", "n_obs", "m_prec", "sd_prec", "n_items", "min_val", "max_val")) |>
  full_join(rename(dat_n_11_package, 
                   max_sd_package = max_sd, 
                   min_sd_package = min_sd,
                   tides_package = tides),
            by = c("mean", "sd", "n_obs", "m_prec", "sd_prec", "n_items", "min_val", "max_val")) |>
  mutate(max_sd_new_rounded = round(max_sd_new, 2),
         min_sd_new_rounded = round(min_sd_new, 2),
         max_sd_package = case_when(is.nan(max_sd_package) ~ NA, 
                                    TRUE ~ max_sd_package),
         # grim_match = grim.x == grim.y,
         # grimmer_match = grimmer.x == grimmer.y,
         # tides_match = tides_modified_from_.sd_limits == tides_new,
         min_sd_match = case_when(min_sd_.sd_limits == min_sd_new_rounded & min_sd_new_rounded == min_sd_package ~ TRUE,
                                  TRUE ~ FALSE),
         max_sd_match = case_when(max_sd_.sd_limits == max_sd_new_rounded & max_sd_new_rounded == max_sd_package ~ TRUE,
                                  TRUE ~ FALSE),
         tides_match = case_when(tides_modified_from_.sd_limits == tides_new & tides_new == tides_package ~ TRUE,
                                 TRUE ~ FALSE))

# dat_n_11_combined |>
#   count(tides_match)

# dat_n_11_combined |>
#   count(min_sd_match)

# dat_n_11_combined |>
#   count(grim_match)
# 
# dat_n_11_combined |>
#   count(grimmer_match)

# dat_n_11_combined |>
#   count(tides_match)

# dat_n_11_combined |>
#   count(max_sd_match)

res_comparisons <- dat_n_11_combined |>
  select(mean, 
         sd, 
         #n_obs, 
         min_sd_.sd_limits, 
         min_sd_new = min_sd_new_rounded, 
         min_sd_package,
         min_sd_match,
         max_sd_.sd_limits, 
         max_sd_new = max_sd_new_rounded,
         max_sd_package,
         max_sd_match,
         grim.x, 
         #grim.y, 
         grimmer.x, 
         #grimmer.y, 
         tides_modified_from_.sd_limits, 
         tides_new,
         tides_package,
         tides_match) |>
  filter(!is.na(min_sd_.sd_limits) | 
           !is.na(min_sd_new) | 
           !is.na(min_sd_package) | 
           !is.na(max_sd_.sd_limits) | 
           !is.na(max_sd_new) |
           !is.na(max_sd_package)) |>
  distinct(min_sd_.sd_limits, 
           min_sd_new, 
           min_sd_package,
           max_sd_.sd_limits, 
           max_sd_new, 
           max_sd_package,
           grim.x, 
           grimmer.x, 
           tides_modified_from_.sd_limits, 
           tides_new,
           tides_package,
           tides_match,
           .keep_all = TRUE) |>
  filter(grim.x, grimmer.x)

# no rows have:
res_comparisons |>
  filter(tides_modified_from_.sd_limits == TRUE & tides_new == FALSE)
## # A tibble: 0 × 16
## # ℹ 16 variables: mean <dbl>, sd <dbl>, min_sd_.sd_limits <dbl>,
## #   min_sd_new <dbl>, min_sd_package <dbl>, min_sd_match <lgl>,
## #   max_sd_.sd_limits <dbl>, max_sd_new <dbl>, max_sd_package <dbl>,
## #   max_sd_match <lgl>, grim.x <lgl>, grimmer.x <lgl>,
## #   tides_modified_from_.sd_limits <lgl>, tides_new <lgl>, tides_package <lgl>,
## #   tides_match <lgl>
# many rows have:
res_comparisons |>
  filter(tides_modified_from_.sd_limits == FALSE & tides_new == TRUE)
## # A tibble: 35 × 16
##     mean    sd min_sd_.sd_limits min_sd_new min_sd_package min_sd_match
##    <dbl> <dbl>             <dbl>      <dbl>          <dbl> <lgl>       
##  1   1.1  0.3                 NA       0.3            0.3  FALSE       
##  2   1.2  0.4                 NA       0.4            0.4  FALSE       
##  3   1.3  0.47                NA       0.47           0.47 FALSE       
##  4   1.4  0.5                 NA       0.5            0.5  FALSE       
##  5   1.5  0.52                NA       0.52           0.52 FALSE       
##  6   1.6  0.5                 NA       0.5            0.5  FALSE       
##  7   1.7  0.47                NA       0.47           0.47 FALSE       
##  8   1.8  0.4                 NA       0.4            0.4  FALSE       
##  9   1.9  0.3                 NA       0.3            0.3  FALSE       
## 10   1.9  2.07                NA       0.3            0.3  FALSE       
## # ℹ 25 more rows
## # ℹ 10 more variables: max_sd_.sd_limits <dbl>, max_sd_new <dbl>,
## #   max_sd_package <dbl>, max_sd_match <lgl>, grim.x <lgl>, grimmer.x <lgl>,
## #   tides_modified_from_.sd_limits <lgl>, tides_new <lgl>, tides_package <lgl>,
## #   tides_match <lgl>
temp <- res_comparisons |>
  filter(!is.na(min_sd_.sd_limits)) |>
  distinct(min_sd_match, max_sd_match, tides_match,
           .keep_all = TRUE)
  • view res_comparisons for examples of impossible scores and disagreements between methods

todo

  • newly developed functions dont taking precision or rounding into account
  • wheres the code for the POMP plots? - in the /dev/relative folder

testing

  • min_sd() the new function returns min_sd = 0 when the required mean is less than the scale minimum, where the other functions return NA.